Jan-Philipp Kolb
8 Mai 2017
Die Struktur der Daten kann man sich mit einem JSON Viewer anschauen
jsonliteinstall.packages("jsonlite")library(jsonlite)
citation("jsonlite")##
## To cite jsonlite in publications use:
##
## Jeroen Ooms (2014). The jsonlite Package: A Practical and
## Consistent Mapping Between JSON Data and R Objects.
## arXiv:1403.2805 [stat.CO] URL http://arxiv.org/abs/1403.2805.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {The jsonlite Package: A Practical and Consistent Mapping Between JSON Data and R Objects},
## author = {Jeroen Ooms},
## journal = {arXiv:1403.2805 [stat.CO]},
## year = {2014},
## url = {http://arxiv.org/abs/1403.2805},
## }
library("jsonlite")
DRINKWATER <- fromJSON("data/RomDrinkingWater.geojson")names(DRINKWATER)[1:3]## [1] "type" "generator" "copyright"
names(DRINKWATER)[4:5]## [1] "timestamp" "features"
head(DRINKWATER$features)## type id properties.@id properties.amenity properties.flow
## 1 Feature node/246574149 node/246574149 drinking_water push-button
## 2 Feature node/246574150 node/246574150 drinking_water <NA>
## 3 Feature node/246574151 node/246574151 drinking_water <NA>
## 4 Feature node/248743324 node/248743324 drinking_water <NA>
## 5 Feature node/251773348 node/251773348 drinking_water <NA>
## 6 Feature node/251773551 node/251773551 drinking_water <NA>
## properties.type properties.name properties.name:fr properties.wheelchair
## 1 nasone <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA>
## 5 nasone <NA> <NA> <NA>
## 6 <NA> Acqua Marcia Eau potable yes
## properties.created_by properties.indoor geometry.type
## 1 <NA> <NA> Point
## 2 <NA> <NA> Point
## 3 <NA> <NA> Point
## 4 <NA> <NA> Point
## 5 <NA> <NA> Point
## 6 <NA> <NA> Point
## geometry.coordinates
## 1 12.49191, 41.89479
## 2 12.49095, 41.89489
## 3 12.48774, 41.89450
## 4 12.48773, 41.89354
## 5 12.48529, 41.88539
## 6 12.48386, 41.89332
my_repos <- fromJSON("https://api.github.com/users/japhilko/repos")names(my_repos)## [1] "id" "name" "full_name"
## [4] "owner" "private" "html_url"
## [7] "description" "fork" "url"
## [10] "forks_url" "keys_url" "collaborators_url"
## [13] "teams_url" "hooks_url" "issue_events_url"
## [16] "events_url" "assignees_url" "branches_url"
## [19] "tags_url" "blobs_url" "git_tags_url"
## [22] "git_refs_url" "trees_url" "statuses_url"
## [25] "languages_url" "stargazers_url" "contributors_url"
## [28] "subscribers_url" "subscription_url" "commits_url"
## [31] "git_commits_url" "comments_url" "issue_comment_url"
## [34] "contents_url" "compare_url" "merges_url"
## [37] "archive_url" "downloads_url" "issues_url"
## [40] "pulls_url" "milestones_url" "notifications_url"
## [43] "labels_url" "releases_url" "deployments_url"
## [46] "created_at" "updated_at" "pushed_at"
## [49] "git_url" "ssh_url" "clone_url"
## [52] "svn_url" "homepage" "size"
## [55] "stargazers_count" "watchers_count" "language"
## [58] "has_issues" "has_projects" "has_downloads"
## [61] "has_wiki" "has_pages" "forks_count"
## [64] "mirror_url" "open_issues_count" "forks"
## [67] "open_issues" "watchers" "default_branch"
library(jsonlite)
res <- fromJSON('http://ergast.com/api/f1/2004/1/results.json')
drivers <- res$MRData$RaceTable$Races$Results[[1]]$Driver
colnames(drivers)## [1] "driverId" "code" "url" "givenName"
## [5] "familyName" "dateOfBirth" "nationality" "permanentNumber"
article_key <- "&api-key=c2fede7bd9aea57c898f538e5ec0a1ee:6:68700045"
url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=obamacare+socialism"
req <- fromJSON(paste0(url, article_key))
articles <- req$response$docs
colnames(articles)## [1] "web_url" "snippet" "lead_paragraph"
## [4] "abstract" "print_page" "blog"
## [7] "source" "multimedia" "headline"
## [10] "keywords" "pub_date" "document_type"
## [13] "news_desk" "section_name" "subsection_name"
## [16] "byline" "type_of_material" "_id"
## [19] "word_count" "slideshow_credits"
XML Paketlibrary(XML)
citation("XML")##
## To cite package 'XML' in publications use:
##
## Duncan Temple Lang and the CRAN Team (2016). XML: Tools for
## Parsing and Generating XML Within R and S-Plus. R package
## version 3.98-1.5. https://CRAN.R-project.org/package=XML
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {XML: Tools for Parsing and Generating XML Within R and S-Plus},
## author = {Duncan Temple Lang and the CRAN Team},
## year = {2016},
## note = {R package version 3.98-1.5},
## url = {https://CRAN.R-project.org/package=XML},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
xml2 Paketinstall.packages("xml2")library(xml2)
citation("xml2")##
## To cite package 'xml2' in publications use:
##
## Hadley Wickham and James Hester (2016). xml2: Parse XML. R
## package version 1.0.0. https://CRAN.R-project.org/package=xml2
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {xml2: Parse XML},
## author = {Hadley Wickham and James Hester},
## year = {2016},
## note = {R package version 1.0.0},
## url = {https://CRAN.R-project.org/package=xml2},
## }
url <- "http://api.openstreetmap.org/api/0.6/
relation/62422"library(xml2)
BE <- xmlParse(url)Administrative Grenzen Berlin
xmltop = xmlRoot(BE)
class(xmltop)## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
xmlSize(xmltop)## [1] 1
xmlSize(xmltop[[1]])## [1] 328
Xpath, the XML Path Language, is a query language for selecting nodes from an XML document.
xpathApply(BE,"//tag[@k = 'source:population']")## [[1]]
## <tag k="source:population" v="http://www.statistik-berlin-brandenburg.de/Publikationen/Stat_Berichte/2010/SB_A1-1_A2-4_q01-10_BE.pdf 2010-10-01"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
url2 <- "http://api.openstreetmap.org/api/0.6/node/2923760808"
RennesBa <- xmlParse(url2)url3 <- "http://api.openstreetmap.org/api/0.6/way/72799743"
MadCalle <- xmlParse(url3)Logo Overpass API
The Overpass API is a read-only API that serves up custom selected parts of the OSM map data.
Spielplätze Mannheim
Export Rohdaten
Link1 <- "http://www.overpass-api.de/api/interpreter?
data=[maxsize:1073741824][timeout:900];area[name=\""library(XML)
place <- "Mannheim"
type_obj <- "node"
object <- "leisure=playground"
InfoList <- xmlParse(paste(Link1,place,"\"];",
type_obj,"(area)[",object,"];out;",sep=""))Spielplätze in Mannheim
Die Liste der ID’s mit dem Wert playground:
node_id <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ id")
## node_id[[1]]Erste node id
lat_x <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ lat")
# lat_x[[1]];lat_x[[2]]lat_x <- xpathApply(InfoList,
"//tag[@v= 'playground']/parent::node/@ lon")Latitude Koordinate
library(devtools)
install_github("Japhilko/gosmd")library(gosmd)## Loading required package: maptools
## Loading required package: sp
## Checking rgeos availability: TRUE
## Loading required package: RJSONIO
##
## Attaching package: 'RJSONIO'
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
## Loading required package: stringr
pg_MA <- get_osm_nodes(object="leisure=playground",
"Mannheim")
info <- extract_osm_nodes(OSM.Data=pg_MA,
value="playground")| leisure | lat | lon | note | |
|---|---|---|---|---|
| 30560755 | playground | 49.51910 | 8.502807 | NA |
| 76468450 | playground | 49.49633 | 8.539396 | Rutsche, Schaukel, großer Sandkasten, Tischtennis |
| 76468534 | playground | 49.49678 | 8.552959 | NA |
| 76468535 | playground | 49.49230 | 8.548750 | NA |
| 76468536 | playground | 49.50243 | 8.548140 | Schaukel, Rutsche, Sandkasten, Spielhäuser, Tischtennis |
| 76468558 | playground | 49.49759 | 8.542036 | NA |
http://www.stat.berkeley.edu/~statcur/Workshop2/Presentations/XML.pdf
http://www.di.fc.ul.pt/~jpn/r/web/index.html#parsing-xml
http://www.w3schools.com/xml/xquery_intro.asp
http://giventhedata.blogspot.de/2012/06/r-and-web-for-beginners-part-ii-xml-in.html
http://gastonsanchez.com/Handling_and_Processing_Strings_in_R.pdf
XML - Gaston Sanchezlibrary("XML")Gaston Sanchez - Dataflow
Seine Arbeit sieht man hier.
Gaston Sanchez - Webdaten bekommen
| Function | Description |
|---|---|
| xmlName() | name of the node |
| xmlSize() | number of subnodes |
| xmlAttrs() | named character vector of all attributes |
| xmlGetAttr() | value of a single attribute |
| xmlValue() | contents of a leaf node |
| xmlParent() | name of parent node |
| xmlAncestors() | name of ancestor nodes |
| getSibling() | siblings to the right or to the left |
| xmlNamespace() | the namespace (if there’s one) |
Administrative Grenzen für Deutschland
url <- "http://api.openstreetmap.org/api/0.6/relation/62422"BE <- xmlParse(url)Administrative Grenzen Berlin
xmltop = xmlRoot(BE)
class(xmltop)## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
xmlSize(xmltop)## [1] 1
xmlSize(xmltop[[1]])## [1] 328
Xpath, the XML Path Language, is a query language for selecting nodes from an XML document.
xpathApply(BE,"//tag[@k = 'population']")## [[1]]
## <tag k="population" v="3440441"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
xpathApply(BE,"//tag[@k = 'source:population']")## [[1]]
## <tag k="source:population" v="http://www.statistik-berlin-brandenburg.de/Publikationen/Stat_Berichte/2010/SB_A1-1_A2-4_q01-10_BE.pdf 2010-10-01"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
xpathApply(BE,"//tag[@k = 'name:ta']")## [[1]]
## <tag k="name:ta" v="<U+0BAA><U+0BC6><U+0BB0><U+0BCD><U+0BB2><U+0BBF><U+0BA9><U+0BCD>"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
region <- xpathApply(BE,
"//tag[@k = 'geographical_region']")
# regular expressions
region[[1]]## <tag k="geographical_region" v="Barnim;Berliner Urstromtal;Teltow;Nauener Platte"/>
<tag k="geographical_region"
v="Barnim;Berliner Urstromtal;
Teltow;Nauener Platte"/>
Barnim
url2<-"http://api.openstreetmap.org/api/0.6/node/25113879"
obj2<-xmlParse(url2)
obj_amenity<-xpathApply(obj2,"//tag[@k = 'amenity']")[[1]]
obj_amenity## <tag k="amenity" v="university"/>
xpathApply(obj2,"//tag[@k = 'wikipedia']")[[1]]## <tag k="wikipedia" v="de:Universität Mannheim"/>
xpathApply(obj2,"//tag[@k = 'wheelchair']")[[1]]xpathApply(obj2,"//tag[@k = 'name']")[[1]]url3<-"http://api.openstreetmap.org/api/0.6/node/303550876"
obj3 <- xmlParse(url3)
xpathApply(obj3,"//tag[@k = 'opening_hours']")[[1]]## <tag k="opening_hours" v="Mo-Sa 09:00-20:00; Su,PH off"/>
url4<-"http://api.openstreetmap.org/api/0.6/node/25439439"
obj4 <- xmlParse(url4)
xpathApply(obj4,"//tag[@k = 'railway:station_category']")[[1]]## <tag k="railway:station_category" v="2"/>
library(rvest)##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
bhfkat<-read_html(
"https://de.wikipedia.org/wiki/Bahnhofskategorie")
df_html_bhfkat<-html_table(
html_nodes(bhfkat, "table")[[1]],fill = TRUE)| Stufe | Bahnsteigkanten | Bahnsteiglänge | Reisende/Tag | Zughalte/Tag |
|---|---|---|---|---|
| 6 | 01 | > 000 bis 090 m | 00000 bis 00049 | 000 bis 0010 |
| 5 | 02 | > 090 bis 140 m | 00050 bis 00299 | 011 bis 0050 |
| 4 | 03 bis 04 | > 140 bis 170 m | 00300 bis 00999 | 051 bis 0100 |
| 3 | 05 bis 09 | > 170 bis 210 m | 01000 bis 09999 | 101 bis 0500 |
| 2 | 10 bis 14 | > 210 bis 280 m | 10.000 bis 49.999 | 501 bis 1000 |
| 1 | 00i ab 15 | > 280 m | 00000i ab 50.000 | 000i ab 1001 |
url5<-"http://api.openstreetmap.org/api/0.6/way/162149882"
obj5<-xmlParse(url5)
xpathApply(obj5,"//tag[@k = 'name']")[[1]]## <tag k="name" v="City-Airport Mannheim"/>
xpathApply(obj5,"//tag[@k = 'website']")[[1]]## <tag k="website" v="http://www.flugplatz-mannheim.de/"/>
xpathApply(obj5,"//tag[@k = 'iata']")[[1]]## <tag k="iata" v="MHG"/>
Deborah Nolan - Extracting data from XML
Duncan Temple Lang - A Short Introduction to the XML package for R
Noch mehr Informationen
citation("XML")##
## To cite package 'XML' in publications use:
##
## Duncan Temple Lang and the CRAN Team (2016). XML: Tools for
## Parsing and Generating XML Within R and S-Plus. R package
## version 3.98-1.5. https://CRAN.R-project.org/package=XML
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {XML: Tools for Parsing and Generating XML Within R and S-Plus},
## author = {Duncan Temple Lang and the CRAN Team},
## year = {2016},
## note = {R package version 3.98-1.5},
## url = {https://CRAN.R-project.org/package=XML},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("xml2")##
## To cite package 'xml2' in publications use:
##
## Hadley Wickham and James Hester (2016). xml2: Parse XML. R
## package version 1.0.0. https://CRAN.R-project.org/package=xml2
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {xml2: Parse XML},
## author = {Hadley Wickham and James Hester},
## year = {2016},
## note = {R package version 1.0.0},
## url = {https://CRAN.R-project.org/package=xml2},
## }
citation("XML")##
## To cite package 'XML' in publications use:
##
## Duncan Temple Lang and the CRAN Team (2016). XML: Tools for
## Parsing and Generating XML Within R and S-Plus. R package
## version 3.98-1.5. https://CRAN.R-project.org/package=XML
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {XML: Tools for Parsing and Generating XML Within R and S-Plus},
## author = {Duncan Temple Lang and the CRAN Team},
## year = {2016},
## note = {R package version 3.98-1.5},
## url = {https://CRAN.R-project.org/package=XML},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.